* created by anna 

* this dofile: runs heterogeneity analysis using machine learning.  Calculates BLP, GATES and CLAN.  Repeats with 100 random splits into main and auxiliary samples.  Saves coefficients into datasets.  

*weights and variables in global set by analysis_master.do 

foreach m in $modellist {

	mat het_`m'_BLP = J(100,8,.)
	mat het_`m'_GATES = J(100,20,.)
		
	foreach v in $g {
	
		local mark = strpos("`v'","_std")	
		local v_short = substr("`v'",1,`mark' -1)
	
		mat het_`m'_`v_short' = J(100,20,.)
	}		
	
	preserve
	
	keep if $het_sample
	
	keep if `m' == 1
	keep if $het_outcome != .
		
	forvalues i = 1(1)$reps {

		*split into main and auxiliary sample
		
		capture drop rand* 
		capture drop list
		capture drop m 
		capture drop pred* 
		
		gen rand = runiform()
		gen rand2 = runiform()

		by lottery_union, sort: egen rand_lu = mean(rand2)
		
		sort rand_lu rand 
		gen list = _n 	
		
		gen m = mod(list,2) 
		
		*predict outcome in main sample, using auxiliary sample to train CART
				
		capture erase ${path_estimates}/${het_outcome}_`m'.mmat
		capture drop pred_${het_outcome}_`m'*
		
		quiet crtrees ${het_outcome} $g if m == 0, rforest gen(pred_${het_outcome}_`m') savetrees(${path_estimates}/${het_outcome}_`m'.mmat)

		predict pred_${het_outcome}_hat pred_${het_outcome}_stdp , opentrees(${path_estimates}/${het_outcome}_`m'.mmat)
				
		*demean predicted value
		
		egen pred_${het_outcome}_m = wtmean(pred_${het_outcome}_hat), weight(${weight}) by(m)
		gen pred_${het_outcome}_dm = pred_${het_outcome}_hat -  pred_${het_outcome}_m
			
		*estimate BLP
		
		quiet reg ${het_outcome} pred_${het_outcome}_dm if m == 1 [iweight = $weight],  cluster(village_code_correct) 
				
		mat het_`m'_BLP[`i',1] = _b[_cons]
		mat het_`m'_BLP[`i',2] = r(table)["ll","_cons"]
		mat het_`m'_BLP[`i',3] = r(table)["ul","_cons"]
		mat het_`m'_BLP[`i',4] = r(table)["pvalue","_cons"]
		
		mat het_`m'_BLP[`i',5] = _b[pred_${het_outcome}_dm]
		mat het_`m'_BLP[`i',6] = r(table)["ll","pred_${het_outcome}_dm"]
		mat het_`m'_BLP[`i',7] = r(table)["ul","pred_${het_outcome}_dm"]
		mat het_`m'_BLP[`i',8] = r(table)["pvalue","pred_${het_outcome}_dm"]
		
		*create quartile dummies 
		
		quiet xtile pred_${het_outcome}_q = pred_${het_outcome}_hat if m == 1, nquantiles(4)
		tab pred_${het_outcome}_q, gen(pred_${het_outcome}_q)
		
		quiet reg ${het_outcome} pred_${het_outcome}_q1-pred_${het_outcome}_q4 if m == 1 [iweight = $weight], nocons cluster(village_code_correct) 
		
		local j = 1
		
		forvalues q = 1(1)4 {
		
			mat het_`m'_GATES[`i',`j'] = _b[pred_${het_outcome}_q`q']
			mat het_`m'_GATES[`i',`j'+1] = r(table)["ll","pred_${het_outcome}_q`q'"]
			mat het_`m'_GATES[`i',`j'+2] = r(table)["ul","pred_${het_outcome}_q`q'"]
			mat het_`m'_GATES[`i',`j'+3] = r(table)["pvalue","pred_${het_outcome}_q`q'"]
	
			local j = `j' + 4
			
		}
		
		quiet lincom pred_${het_outcome}_q4 - pred_${het_outcome}_q1, level(95)
		
		mat het_`m'_GATES[`i',`j'] = r(estimate)
		mat het_`m'_GATES[`i',`j'+1] = r(lb)
		mat het_`m'_GATES[`i',`j'+2] = r(ub)
		mat het_`m'_GATES[`i',`j'+3] = r(p)
		
		foreach v in $g {
			
			local mark = strpos("`v'","_std")	
			local v_short = substr("`v'",1,`mark' -1)
		
			quiet reg `v' pred_${het_outcome}_q1-pred_${het_outcome}_q4 if m == 1 [iweight = $weight], cluster(village_code_correct) nocons
			
			local j = 1
		
			forvalues q = 1(1)4 {
		
				mat het_`m'_`v_short'[`i',`j'] = _b[pred_${het_outcome}_q`q']
				mat het_`m'_`v_short'[`i',`j'+1] = r(table)["ll","pred_${het_outcome}_q`q'"]
				mat het_`m'_`v_short'[`i',`j'+2] = r(table)["ul","pred_${het_outcome}_q`q'"]
				mat het_`m'_`v_short'[`i',`j'+3] = r(table)["pvalue","pred_${het_outcome}_q`q'"]
	
				local j = `j' + 4
			
			}
		
			quiet lincom pred_${het_outcome}_q4 - pred_${het_outcome}_q1, level(95)
		
			mat het_`m'_`v_short'[`i',`j'] = r(estimate)
			mat het_`m'_`v_short'[`i',`j'+1] = r(lb)
			mat het_`m'_`v_short'[`i',`j'+2] = r(ub)
			mat het_`m'_`v_short'[`i',`j'+3] = r(p)
		
		}
		
		
	}
	
	clear 
	
	svmat het_`m'_BLP 
	capture erase $path_estimates/${het_outcome}_`m'_BLP.dta 
	save $path_estimates/${het_outcome}_`m'_BLP 
	
	clear
	
	svmat het_`m'_GATES 
	capture erase $path_estimates/${het_outcome}_`m'_GATES.dta 
	save $path_estimates/${het_outcome}_`m'_GATES	
	
	clear
		
	foreach v in $g {
	
		local mark = strpos("`v'","_std")	
		local v_short = substr("`v'",1,`mark' -1)
		
		svmat het_`m'_`v_short'
				
		capture erase $path_estimates/${het_outcome}_`m'_`v_short'.dta 
		save $path_estimates/${het_outcome}_`m'_`v_short'
		
		clear
	
	}
	
	restore
			
}
